{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 6 Pandas的函数应用"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "# Numpy ufunc 函数，randn跟的是维数\n",
    "df = pd.DataFrame(np.random.randn(5,4) - 1,columns=list('ABCD'))#生成5行4列的dataframe，且对每个元素都减1\n",
    "print(df)\n",
    "\n",
    "print(np.abs(df)) #绝对值"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:31:18.210662Z",
     "start_time": "2025-01-07T13:31:18.203703Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          A         B         C         D\n",
      "0 -1.740379  0.143124  0.029666 -2.615897\n",
      "1 -1.199329 -1.606652 -0.361750 -1.493028\n",
      "2 -3.177121 -1.971403 -0.522001 -0.456429\n",
      "3 -0.175872  0.558880 -1.813727  0.522118\n",
      "4 -1.149097  0.433193 -1.739016 -0.942711\n",
      "          A         B         C         D\n",
      "0  1.740379  0.143124  0.029666  2.615897\n",
      "1  1.199329  1.606652  0.361750  1.493028\n",
      "2  3.177121  1.971403  0.522001  0.456429\n",
      "3  0.175872  0.558880  1.813727  0.522118\n",
      "4  1.149097  0.433193  1.739016  0.942711\n"
     ]
    }
   ],
   "execution_count": 2
  },
  {
   "cell_type": "code",
   "source": [
    "#apply默认作用在列上,x是每一列,因为axis=0（即行会被忽略）\n",
    "print(df.apply(lambda x : x.max()))#lambda是传参的形式,x是每一列"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:31:22.984565Z",
     "start_time": "2025-01-07T13:31:22.979578Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "A   -0.175872\n",
      "B    0.558880\n",
      "C    0.029666\n",
      "D    0.522118\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 3
  },
  {
   "cell_type": "code",
   "source": [
    "#apply作用在行上\n",
    "print(df.apply(lambda x : x.max(), axis=1))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:32:30.315872Z",
     "start_time": "2025-01-07T13:32:30.312154Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.143124\n",
      "1   -0.361750\n",
      "2   -0.456429\n",
      "3    0.558880\n",
      "4    0.433193\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 4
  },
  {
   "cell_type": "code",
   "source": [
    "# 使用applymap应用到每个数据\n",
    "print(df.map(lambda x : '%.2f' % x))\n",
    "df.dtypes"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:32:51.043176Z",
     "start_time": "2025-01-07T13:32:51.034192Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       A      B      C      D\n",
      "0  -1.74   0.14   0.03  -2.62\n",
      "1  -1.20  -1.61  -0.36  -1.49\n",
      "2  -3.18  -1.97  -0.52  -0.46\n",
      "3  -0.18   0.56  -1.81   0.52\n",
      "4  -1.15   0.43  -1.74  -0.94\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "A    float64\n",
       "B    float64\n",
       "C    float64\n",
       "D    float64\n",
       "dtype: object"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 5
  },
  {
   "cell_type": "code",
   "source": [
    "type('%.2f' % 1.3456)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:33:03.294594Z",
     "start_time": "2025-01-07T13:33:03.286861Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "str"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 6
  },
  {
   "cell_type": "markdown",
   "source": "## 6.4 索引排序（不重要）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# Series\n",
    "print(np.random.randint(5, size=5))\n",
    "print('-'*50)\n",
    "s4 = pd.Series(range(10, 15), index = np.random.randint(5, size=5)) #索引随机生成\n",
    "print(s4)\n",
    "print('-'*50)\n",
    "# 索引排序,sort_index返回一个新的排好索引的series\n",
    "print(s4.sort_index())\n",
    "print(s4)\n",
    "# s4.loc[0:3]  loc索引值不唯一时直接报错\n",
    "print(s4.iloc[0:3])\n",
    "s4[0:3]  #默认用的位置索引"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:33:20.400572Z",
     "start_time": "2025-01-07T13:33:20.384603Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 3 0 1 2]\n",
      "--------------------------------------------------\n",
      "0    10\n",
      "1    11\n",
      "3    12\n",
      "2    13\n",
      "0    14\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "0    10\n",
      "0    14\n",
      "1    11\n",
      "2    13\n",
      "3    12\n",
      "dtype: int64\n",
      "0    10\n",
      "1    11\n",
      "3    12\n",
      "2    13\n",
      "0    14\n",
      "dtype: int64\n",
      "0    10\n",
      "1    11\n",
      "3    12\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0    10\n",
       "1    11\n",
       "3    12\n",
       "dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 7
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [],
   "source": [
    "# s4.loc[1:2] #loc索引值唯一时可以切片"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# DataFrame\n",
    "df4 = pd.DataFrame(np.random.randn(5, 5),\n",
    "                   index=np.random.randint(5, size=5),\n",
    "                   columns=np.random.randint(5, size=5))\n",
    "print(df4)\n",
    "#轴零是行索引排序\n",
    "df4_isort = df4.sort_index(axis=0, ascending=False)\n",
    "print(df4_isort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:33:24.690487Z",
     "start_time": "2025-01-07T13:33:24.681352Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          2         1         3         1         1\n",
      "0 -0.598107  0.386675  0.940453  1.143036 -0.221983\n",
      "1  0.792275 -0.395673 -1.082200  1.014383  1.405390\n",
      "1  2.353458  1.250692  0.422466  0.284185  0.696445\n",
      "0 -0.105652 -1.640833 -0.047400 -0.265767 -0.163336\n",
      "3  0.126646 -0.533236  0.685059  2.210881 -0.364784\n",
      "          2         1         3         1         1\n",
      "3  0.126646 -0.533236  0.685059  2.210881 -0.364784\n",
      "1  2.353458  1.250692  0.422466  0.284185  0.696445\n",
      "1  0.792275 -0.395673 -1.082200  1.014383  1.405390\n",
      "0 -0.598107  0.386675  0.940453  1.143036 -0.221983\n",
      "0 -0.105652 -1.640833 -0.047400 -0.265767 -0.163336\n"
     ]
    }
   ],
   "execution_count": 8
  },
  {
   "cell_type": "code",
   "source": [
    "#轴1是列索引排序\n",
    "df4_isort = df4.sort_index(axis=1, ascending=True)\n",
    "print(df4_isort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T13:33:28.669483Z",
     "start_time": "2025-01-07T13:33:28.654921Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          1         1         1         2         3\n",
      "0  0.386675  1.143036 -0.221983 -0.598107  0.940453\n",
      "1 -0.395673  1.014383  1.405390  0.792275 -1.082200\n",
      "1  1.250692  0.284185  0.696445  2.353458  0.422466\n",
      "0 -1.640833 -0.265767 -0.163336 -0.105652 -0.047400\n",
      "3 -0.533236  2.210881 -0.364784  0.126646  0.685059\n"
     ]
    }
   ],
   "execution_count": 9
  },
  {
   "cell_type": "markdown",
   "source": "# 6.5 按值排序（机器学习，深度学习不重要，数据分析才需要）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# 按值排序,by后是column的值\n",
    "import random\n",
    "l=[random.randint(0,100) for i in range(24)] #生成24个随机数\n",
    "df4 = pd.DataFrame(np.array(l).reshape(6,4)) #生成6行4列的dataframe\n",
    "# print(df4) #查看数据,ndarray\n",
    "# print('-'*50)\n",
    "print(df4)\n",
    "print('-'*50)\n",
    "#按轴零排序，by后是列名,交换的是行\n",
    "df4_vsort = df4.sort_values(by=3,axis=0, ascending=False) #寻找的是columns里的3,重要，ascend=False表示降序，对第三列的值进行排序\n",
    "print(df4_vsort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:33:38.933794Z",
     "start_time": "2025-01-07T13:33:38.926011Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "     0   1   2   3\n",
      "0   82  62  17  63\n",
      "1   17  73  22  76\n",
      "2   68  73  13  13\n",
      "3   39  37  69  81\n",
      "4  100  36  75  67\n",
      "5   50  31  33  62\n",
      "--------------------------------------------------\n",
      "     0   1   2   3\n",
      "3   39  37  69  81\n",
      "1   17  73  22  76\n",
      "4  100  36  75  67\n",
      "0   82  62  17  63\n",
      "5   50  31  33  62\n",
      "2   68  73  13  13\n"
     ]
    }
   ],
   "execution_count": 10
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    2   0   1    3\n",
      "0  58  65  21   61\n",
      "1  35  88  25   86\n",
      "2  63  65  28    5\n",
      "3  95  64  45   23\n",
      "4  57   0  92  100\n",
      "5  89  52  57   24\n"
     ]
    }
   ],
   "source": [
    "#按轴1排序，by后行索引名，交换的是列\n",
    "df4_vsort = df4.sort_values(by=3,axis=1, ascending=False) #寻找的是index里的3\n",
    "print(df4_vsort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-04-11T03:36:15.358985700Z",
     "start_time": "2024-04-11T03:36:15.309976600Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": "# 6.6 处理缺失数据（重要）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "df_data = pd.DataFrame([np.random.randn(3), [1., 2., np.nan],\n",
    "                       [np.nan, 4., np.nan], [1., 2., 3.]])\n",
    "print(df_data.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:35:22.883504Z",
     "start_time": "2025-01-07T13:35:22.877694Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0  0.364324 -0.087416 -0.914237\n",
      "1  1.000000  2.000000       NaN\n",
      "2       NaN  4.000000       NaN\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "execution_count": 11
  },
  {
   "cell_type": "code",
   "source": [
    "df_data.iloc[2,0]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T13:35:24.680049Z",
     "start_time": "2025-01-07T13:35:24.676165Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.float64(nan)"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 12
  },
  {
   "cell_type": "code",
   "source": [
    "#isnull来判断是否有空的数据(即NaN)\n",
    "print(df_data.isnull())#判断每个元素是否为空\n",
    "print(df_data.isnull().iloc[3,0])#对第3行第1列的数据进行判断是否为空"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:38:20.582836Z",
     "start_time": "2025-01-07T13:38:20.577988Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       0      1      2\n",
      "0  False  False  False\n",
      "1  False  False   True\n",
      "2   True  False   True\n",
      "3  False  False  False\n",
      "False\n"
     ]
    }
   ],
   "execution_count": 19
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T13:38:31.109440Z",
     "start_time": "2025-01-07T13:38:31.104422Z"
    }
   },
   "cell_type": "code",
   "source": [
    "#帮我计算df_data缺失率\n",
    "print(df_data.isnull().sum()/len(df_data))#计算每个元素是否为空的概率"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.25\n",
      "1    0.00\n",
      "2    0.50\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 20
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 删除缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "#默认一个样本，任何一个特征缺失，就删除\n",
    "#inplace True是修改的是原有的df\n",
    "#subset=[0]是指按第一列来删除,第一列有空值就删除对应的行\n",
    "print(df_data.dropna(subset=[0]))\n",
    "print('*'*50)\n",
    "print(df_data.dropna(subset=[2]))#第二列用两个nan,即删除2行\n",
    "# df_data"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:41:15.832169Z",
     "start_time": "2025-01-07T13:41:15.824080Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0  0.364324 -0.087416 -0.914237\n",
      "3  1.000000  2.000000  3.000000\n",
      "**************************************************\n",
      "          0         1         2\n",
      "0  0.364324 -0.087416 -0.914237\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "execution_count": 26
  },
  {
   "cell_type": "code",
   "source": [
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T13:40:57.657722Z",
     "start_time": "2025-01-07T13:40:57.650877Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0  0.364324 -0.087416 -0.914237\n",
       "1  1.000000  2.000000       NaN\n",
       "2       NaN  4.000000       NaN\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.364324</td>\n",
       "      <td>-0.087416</td>\n",
       "      <td>-0.914237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 24
  },
  {
   "cell_type": "code",
   "source": [
    "#用的不多，用在某个特征缺失太多时，才会进行删除\n",
    "print(df_data.dropna(axis=1))  #某列由nan就删除该列"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:41:33.778588Z",
     "start_time": "2025-01-07T13:41:33.773244Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0  0.364324 -0.087416 -0.914237\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "execution_count": 27
  },
  {
   "cell_type": "code",
   "source": [
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T13:41:35.531449Z",
     "start_time": "2025-01-07T13:41:35.523065Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0  0.364324 -0.087416 -0.914237\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.364324</td>\n",
       "      <td>-0.087416</td>\n",
       "      <td>-0.914237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 28
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 填充缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "#均值，中位数，众数填充"
  },
  {
   "cell_type": "code",
   "source": [
    "#给零列的空值填为-100，按特征（按列）去填充\n",
    "print(df_data.iloc[:,0].fillna(-100.))\n",
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:41:57.704814Z",
     "start_time": "2025-01-07T13:41:57.697595Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.364324\n",
      "3    1.000000\n",
      "Name: 0, dtype: float64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0  0.364324 -0.087416 -0.914237\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.364324</td>\n",
       "      <td>-0.087416</td>\n",
       "      <td>-0.914237</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 29
  },
  {
   "cell_type": "code",
   "source": [
    "#依次拿到每一列\n",
    "for i in df_data.columns:\n",
    "    print(df_data.loc[:,i])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T13:42:04.349262Z",
     "start_time": "2025-01-07T13:42:04.345429Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.364324\n",
      "3    1.000000\n",
      "Name: 0, dtype: float64\n",
      "0   -0.087416\n",
      "3    2.000000\n",
      "Name: 1, dtype: float64\n",
      "0   -0.914237\n",
      "3    3.000000\n",
      "Name: 2, dtype: float64\n"
     ]
    }
   ],
   "execution_count": 30
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T13:42:17.148386Z",
     "start_time": "2025-01-07T13:42:17.143762Z"
    }
   },
   "cell_type": "code",
   "source": "df_data.iloc[:,0].fillna(-100.,inplace=True) #inplace=True后面会被删除",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\顾笙丷\\AppData\\Local\\Temp\\ipykernel_19152\\2218614896.py:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
      "\n",
      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
      "\n",
      "\n",
      "  df_data.iloc[:,0].fillna(-100.,inplace=True) #inplace=True后面会被删除\n"
     ]
    }
   ],
   "execution_count": 31
  },
  {
   "metadata": {},
   "cell_type": "markdown",
   "source": "# .mean()是求均值"
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T07:37:49.794275Z",
     "start_time": "2025-01-07T07:37:49.791340Z"
    }
   },
   "cell_type": "code",
   "source": "df_data.iloc[:,2]=df_data.iloc[:,2].fillna(df_data.iloc[:,2].mean()) #用均值填充空值(.mean()是求均值)",
   "outputs": [],
   "execution_count": 27
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T07:37:55.580904Z",
     "start_time": "2025-01-07T07:37:55.575901Z"
    }
   },
   "cell_type": "code",
   "source": "df_data",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "            0         1         2\n",
       "0   -1.264229 -0.514817  1.420473\n",
       "1    1.000000  2.000000  2.210236\n",
       "2 -100.000000  4.000000  2.210236\n",
       "3    1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.264229</td>\n",
       "      <td>-0.514817</td>\n",
       "      <td>1.420473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>2.210236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-100.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>2.210236</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 28
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
